#!/usr/bin/env python
# encoding: utf-8
"""
xml.py

Created by  on 2008-09-14.
Copyright (c) 2008 __MyCompanyName__. All rights reserved.
"""
import re
class XMLNode:
	def __init__(self, tag = None, content = None):
		self.tag = tag
		self.properties = {}
		self.childnodes = {}
		self.content = content
	
	def dump(self, skip=None):
		if skip:
			if self.tag in skip:
				return "<"+self.tag+">"+'${'+self.tag+'}'+"</"+self.tag+">"
		result = '<'+self.tag
		keys = self.properties.keys()
		for key in keys:
			result = result+' '+key+'="'+self.properties[key]+'"'
		result = result+">"
		
		for key in self.childnodes.keys():
			nodeArray = self.childnodes[key]
			for childNode in nodeArray: 
				result = result + childNode.dump(skip)
		if self.content:
			result = result+self.content
		result = result+ "</"+self.tag+'>'
		
		return result
	
	def addProperty(self, property, value):
		self.properties[property] = value
	
	def addChildNode(self, tag, node):
		if not self.childnodes.has_key(tag):
			self.childnodes[tag] = [node]
		else:
			self.childnodes[tag].append(node)
	
	def setContent(self, content):
		if self.content is None:
			self.content = content
		else:
			self.content += ' ' + content

class XMLStack:
	def __init__(self):
		self.stack = []
	
	def pushNode(self, node):
		self.stack.append(node)
	
	def popNode(self):
		if len(self.stack) == 0:
			raise "Error", "stack is empty"
		node = self.stack[-1]
		del self.stack[-1]
		return node
	
	def isEmpty(self):
		if len(self.stack) == 0:
			return 1
		return 0
	
	def length(self):
		return len(self.stack)

# REX/For Python
# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
# Technical Report TR 1998-17, School of Computing Science, Simon Fraser
# University, November, 1998.
# Copyright (c) 1998, Robert D. Cameron.
# The following code may be freely used and distributed provided that
# this copyright and citation notice remains intact and that modifications
# or additions are clearly identified.
TextSE = "[^<]+"
UntilHyphen = "[^-]*-"
Until2Hyphens = UntilHyphen + "([^-]" + UntilHyphen + ")*-"
CommentCE = Until2Hyphens + ">?"
UntilRSBs = "[^]]*]([^]]+])*]+"
CDATA_CE = UntilRSBs + "([^]>]" + UntilRSBs + ")*>"
S = "[ \\n\\t\\r]+"
NameStrt = "[A-Za-z_:]|[^\\x00-\\x7F]"
NameChar = "[A-Za-z0-9_:.-]|[^\\x00-\\x7F]"
Name = "(" + NameStrt + ")(" + NameChar + ")*"
QuoteSE = '"[^"]' + "*" + '"' + "|'[^']*'"
DT_IdentSE = S + Name + "(" + S + "(" + Name + "|" + QuoteSE + "))*"
MarkupDeclCE = "([^]\"'><]+|" + QuoteSE + ")*>"
S1 = "[\\n\\r\\t ]"
UntilQMs = "[^?]*\\?+"
PI_Tail = "\\?>|" + S1 + UntilQMs + "([^>?]" + UntilQMs + ")*>"
DT_ItemSE = "<(!(--" + Until2Hyphens + ">|[^-]" + MarkupDeclCE + ")|\\?" + Name + "(" + PI_Tail + "))|%" + Name + ";|" + S
DocTypeCE = DT_IdentSE + "(" + S + ")?(\\[(" + DT_ItemSE + ")*](" + S + ")?)?>?"
DeclCE = "--(" + CommentCE + ")?|\\[CDATA\\[(" + CDATA_CE + ")?|DOCTYPE(" + DocTypeCE + ")?"
PI_CE = Name + "(" + PI_Tail + ")?"
EndTagCE = Name + "(" + S + ")?>?"
AttValSE = '"[^<"]' + "*" + '"' + "|'[^<']*'"
ElemTagCE = Name + "(" + S + Name + "(" + S + ")?=(" + S + ")?(" + AttValSE + "))*(" + S + ")?/?>?"
MarkupSPE = "<(!(" + DeclCE + ")?|\\?(" + PI_CE + ")?|/(" + EndTagCE + ")?|(" + ElemTagCE + ")?)"
XML_SPE = TextSE + "|" + MarkupSPE

# Tag Type Constants
OPEN = 1
OPENCLOSE = 2
CLOSE = 3
UNRECOGNISED = 4

# Unsupported tags (HTML formatting tags for displaying info)
unSupportedTags = ['b', 'i', 'u']

def unquote(quotedString):
	if quotedString[0] <> '"' or quotedString[-1] <> '"':
		return quotedString
	return quotedString[1:len(quotedString)-1]

class XMLParser:
	def __init__(self):
		self.nodeStack = XMLStack()
		self.root = None
	
	def getXMLBuffer(self, fileName):
		if not os.path.exists(fileName):
			return
		f = file(fileName, 'r')
		xmlBuffer = f.read()
		f.close()
		return xmlBuffer
	
	def parseXMLFile(self, fileName):
		xmlBuffer = self.getXMLBuffer(fileName)
		self.parseXML(xmlBuffer)
	
	def parseXML(self, xmlBuffer):
		if xmlBuffer is None or xmlBuffer == '':
			return
		buff = xmlBuffer
		currentNode = None
		while 1:
			m = re.search(XML_SPE, buff)
			if m is None:
				break
			g = m.group(0)
			tag = None
			tagType = None
			if g.startswith('<?'):
				buff = m.string[m.end(0):]
				continue
			if g.startswith('<!'):
				buff = m.string[m.end(0):]
				continue
			if g[0] == '<' and g[1] <> '/':
				if g[len(g)-2] <> '/':
					tag = g[1:len(g)-1].strip()
					tagType = OPEN
				else:
					tag = g[1:len(g)-2].strip()
					tagType = OPENCLOSE
				if tag in unSupportedTags:
					tagType = UNRECOGNISED
					buff = m.string[m.end(0):]
					continue
				else:
					tagParts = tag.split(' ', 1)
					newNode = XMLNode(tag = tagParts[0])
					if len(tagParts) > 1:
						props = tagParts[1].split("' ")
						for i in range(len(props)):
							
							[attr, val] = props[i].split("=\"", 1)
							if val[-1] == "'": val = val[0:len(val)-1]
							newNode.addProperty(attr, val)
					if self.root is None:
						self.root = newNode
						self.nodeStack.pushNode(self.root)
						currentNode = self.root
					else:
						currentNode.addChildNode(tagParts[0], newNode)
						if tagType == OPEN:
							self.nodeStack.pushNode(currentNode)
							currentNode = newNode
			if g[0] == '<' and g[1] == '/':
				tag = g[2:len(g)-1].strip()
				if tag in unSupportedTags:
					tagType = UNRECOGNISED
					buff = m.string[m.end(0):]
					continue
				else:
					tagType = CLOSE
					currentNode = self.nodeStack.popNode()
			if tag is None:
				# This is content for the current tag
				currentNode.setContent(g.strip())
			buff = m.string[m.end(0):]
			# Done parsing
	
	def tag(self, tag,parent=None):
		result = []
	
		if parent==None:
			parent = self.root
		if parent is None:
			print 'return nothing'
			return result
		retnodes = self.traverseElements(parent, tag)
		for entry in retnodes:
			result.append(entry)
		return result
	
	def traverseElements(self, node, tag):
		result = []
		if node.tag == tag:
			result.append(node)
			return result
		for key in node.childnodes.keys():
			nodeArray = node.childnodes[key]
			for childNode in nodeArray:
				retNodes = self.traverseElements(childNode, tag)
				for entry in retNodes:
					result.append(entry)
		return result

